{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pandas version 0.22.0\n",
      "Numpy version 1.13.3\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "pd.set_option('display.max_columns', 100)\n",
    "\n",
    "print('Pandas version ' + pd.__version__)\n",
    "print('Numpy version ' + np.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_table(\"irclog.tsv\", encoding = \"ISO-8859-1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>channel</th>\n",
       "      <th>day</th>\n",
       "      <th>nick</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>line</th>\n",
       "      <th>spam</th>\n",
       "      <th>in_summary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>#dvn</td>\n",
       "      <td>2012-12-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1355005146</td>\n",
       "      <td>iqlogbot joined #dvn</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>#dvn</td>\n",
       "      <td>2012-12-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1355005248</td>\n",
       "      <td>Topic for #dvn is now http://thedata.org - The...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>#dvn</td>\n",
       "      <td>2012-12-08</td>\n",
       "      <td>pdurbin</td>\n",
       "      <td>1355005351</td>\n",
       "      <td>hello! welcome to #dvn, an IRC channel on Free...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>#dvn</td>\n",
       "      <td>2012-12-08</td>\n",
       "      <td>pdurbin</td>\n",
       "      <td>1355005459</td>\n",
       "      <td>our website is http://thedata.org and we're st...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>#dvn</td>\n",
       "      <td>2012-12-08</td>\n",
       "      <td>pdurbin</td>\n",
       "      <td>1355005517</td>\n",
       "      <td>we call our project DVN for short :)</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id channel         day     nick   timestamp  \\\n",
       "0   1    #dvn  2012-12-08      NaN  1355005146   \n",
       "1   2    #dvn  2012-12-08      NaN  1355005248   \n",
       "2   3    #dvn  2012-12-08  pdurbin  1355005351   \n",
       "3   4    #dvn  2012-12-08  pdurbin  1355005459   \n",
       "4   5    #dvn  2012-12-08  pdurbin  1355005517   \n",
       "\n",
       "                                                line  spam  in_summary  \n",
       "0                               iqlogbot joined #dvn     0           0  \n",
       "1  Topic for #dvn is now http://thedata.org - The...     0           0  \n",
       "2  hello! welcome to #dvn, an IRC channel on Free...     0           0  \n",
       "3  our website is http://thedata.org and we're st...     0           0  \n",
       "4               we call our project DVN for short :)     0           0  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 92847 entries, 0 to 92846\n",
      "Data columns (total 8 columns):\n",
      "id            92847 non-null int64\n",
      "channel       92847 non-null object\n",
      "day           92847 non-null object\n",
      "nick          60116 non-null object\n",
      "timestamp     92847 non-null int64\n",
      "line          92845 non-null object\n",
      "spam          92847 non-null int64\n",
      "in_summary    92847 non-null int64\n",
      "dtypes: int64(4), object(4)\n",
      "memory usage: 5.7+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['id', 'channel', 'day', 'nick', 'timestamp', 'line', 'spam', 'in_summary']"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(data.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "#dataverse    82587\n",
       "#dvn          10260\n",
       "Name: channel, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['channel'].value_counts()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
